# -*- coding: utf-8 -*-
'''
Created on 21-09-2012

@author: LONG HOANG GIANG
'''
from CrawlerLib import commonlib, Http, html2text
from lxml import etree
import re
import sqlite3 as db
import sys
import os
sys.path.append(os.path.join(os.path.dirname(__file__), '../'))

def getChaptersLink():
    url = 'http://trandangkhoa.vn/toi-tai-gioi-ban-cung-the-chi-la-bat-da/'
    tree = Http.getXMLTree(url)
    links = []
    for item in tree.xpath("//div[@class='entry']//ul/li/a"):
        link = item.get('href')
        name = commonlib.stringify(item)
        name = name.replace('Tôi Tài Giỏi, Bạn Cũng Thế! – ', '')
        links.append({'name': name, 'link': link})
    return links

def getHtmlOfChapter(chapter):
    print '> get html of chapter {0}'.format(chapter['name'])
    url = chapter['link']
    tree = Http.getXMLTree(url)
    contentNode = tree.xpath("//div[@class='entry']")[0]
    commonlib.Etree.clean_following_sibling(contentNode.xpath(".//div[@class='info']"), True)
    for item in contentNode.xpath("./div[1]"):
        item.getparent().remove(item)
#    content = html2text.html2text(etree.tounicode(contentNode), url, download=commonlib.download_image, path='/toitaigioi')
    content = html2text.html2text(etree.tounicode(contentNode), url, download=None, path='/toitaigioi')
    print content
    content = re.sub(r"\n", "<br />", content)
    return content

def createTable(c):
    c.execute("DROP TABLE IF EXISTS tblChapters")
    c.execute("DROP TABLE IF EXISTS tblDetail")
    c.execute("CREATE TABLE tblChapters(`id` INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, `title` TEXT)")
    c.execute("CREATE INDEX chapter_idx ON tblChapters(id)")
    c.execute("CREATE TABLE tblDetail(`id` INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, `chapterId` INTEGER, `detail` TEXT)")
    c.execute("CREATE INDEX detail_idx ON tblDetail(chapterId)")

#def process():
#    conn = db.connect('story')
#    c = conn.cursor()
#    createTable(c)
#    chapters = getChaptersLink()
#    for chapter in chapters:
#        c.execute("INSERT INTO tblChapters(title) VALUES(?)", [chapter['name'].decode('utf-8')])
#        chapterId = c.lastrowid
#        html = getHtmlOfChapter(chapter).decode('utf-8')
#        c.execute("INSERT INTO tblDetail(chapterId, detail) VALUES(?, ?)", (chapterId, html))
#    c.close()
#    conn.commit()
#    conn.close()
    
if __name__ == '__main__':
    
    
#    process()

#    print commonlib.download_image('http://trandangkhoa.vn/wp-content/uploads/2011/05/hc39-1024x878.jpg', '/toitaigioi', '')
    getHtmlOfChapter({'name': '', 'link': 'http://trandangkhoa.vn/ttgbct-c8-tri-nho-sieu-dang-danh-cho-tu/'})
    
    os._exit(1)
